import scrapy
from scrapy.http import Request

from ..items import HunterItem


class SingleWeb(scrapy.Spider):
    name = 'SngleWeb'
    start_urls = ["http://www.ylie51.com/thread-7553-1-2.html"]

    def start_requests(self):
        for url in self.start_urls:
            yield Request(url, dont_filter=True)

    def parse(self, response):
        item = HunterItem()
        item["url"] = response.url
        item["author"] = response.xpath("//div[@class='authi']/a/text()").extract()[0]  # 可以取出所有的跟帖作者
        item["time"] = response.xpath("//div[@class='authi']/em/text()").extract()[0][4:]  # 可以取出每页发表博客的使用时间
        item["title"] = response.xpath("//*[@id='thread_subject']/text()").extract()[0]
        item["content"] = response.xpath("//td[@class='t_f']/text()").extract()[1]  # 可以取出所有帖子内容，[1]是取出发帖人的发帖内容

        # 如何下载图片，并保存到数据库中
        img = response.xpath("//img[@class='guestviewthumb_cur']/@makefile").extract()
        for i, img_url in enumerate(img):
            img[i] = "http://www.ylie51.com/" + img_url
        item["img_urls"] = ','.join(img)
        yield item